{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "87b40510-b4e9-4a1c-9de6-20639e7c658a",
   "metadata": {
    "id": "87b40510-b4e9-4a1c-9de6-20639e7c658a"
   },
   "source": [
    "# LLM Calculator"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "Use can use this calculator for the following class of models:\n",
    "- [DeepSeek V3-like models](#scrollTo=bdfe6522-3073-4ba8-8521-cb53f94ae663)\n",
    "- [Mixtral-like models](#scrollTo=-L-QRkgDDy82)\n",
    "- [Dense models](#scrollTo=kOhEU-XtD1gc)"
   ],
   "metadata": {
    "id": "B3qf-2XNDSeI"
   },
   "id": "B3qf-2XNDSeI"
  },
  {
   "cell_type": "markdown",
   "id": "bdfe6522-3073-4ba8-8521-cb53f94ae663",
   "metadata": {
    "id": "bdfe6522-3073-4ba8-8521-cb53f94ae663"
   },
   "source": [
    "## DeepSeek V3-like models"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a69d769d-bc31-43d2-8d56-3d28b24156a3",
   "metadata": {
    "id": "a69d769d-bc31-43d2-8d56-3d28b24156a3"
   },
   "source": [
    "Enter your component values here:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9e8cb7f3-9c79-49d1-9d96-b58a09299450",
   "metadata": {
    "id": "9e8cb7f3-9c79-49d1-9d96-b58a09299450",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041549612,
     "user_tz": 180,
     "elapsed": 3,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "outputs": [],
   "source": [
    "batch_per_device = 8\n",
    "num_dense_layers = 3\n",
    "num_moe_layers = 58\n",
    "num_activations = 2\n",
    "max_target_length = 8192\n",
    "vocab_size = 129280\n",
    "emb_dim = 7168\n",
    "mlp_dim = 18432\n",
    "num_query_heads = 128\n",
    "num_kv_heads = 128\n",
    "q_lora_rank = 1536\n",
    "kv_lora_rank = 512\n",
    "qk_nope_head_dim = 128\n",
    "qk_rope_head_dim = 64\n",
    "v_head_dim = 128\n",
    "moe_mlp_dim = 2048\n",
    "shared_experts = 1\n",
    "num_experts = 256\n",
    "num_experts_per_tok = 8"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "617335ee-8d33-4213-bbce-c973557c7718",
   "metadata": {
    "id": "617335ee-8d33-4213-bbce-c973557c7718"
   },
   "source": [
    "Params:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f5160af8-37c0-4741-86fb-4ff3d81e7bac",
   "metadata": {
    "id": "f5160af8-37c0-4741-86fb-4ff3d81e7bac",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041549618,
     "user_tz": 180,
     "elapsed": 7,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "outputs": [],
   "source": [
    "attention_qkv = (num_dense_layers + num_moe_layers) * (\n",
    "    emb_dim * q_lora_rank\n",
    "    + q_lora_rank * num_query_heads * (qk_nope_head_dim + qk_rope_head_dim)\n",
    "    + emb_dim * (kv_lora_rank + qk_rope_head_dim)\n",
    "    + kv_lora_rank * num_query_heads * (qk_nope_head_dim + v_head_dim)\n",
    ")\n",
    "attention_projection = (num_dense_layers + num_moe_layers) * (emb_dim * num_query_heads * v_head_dim)\n",
    "dense_mlp = num_dense_layers * emb_dim * mlp_dim * (num_activations + 1)\n",
    "moe_gate_shared = num_moe_layers * (\n",
    "    emb_dim * num_experts + shared_experts * (num_activations + 1) * (emb_dim * moe_mlp_dim)\n",
    ")\n",
    "moe_routed = num_moe_layers * ((num_activations + 1) * (emb_dim * moe_mlp_dim))\n",
    "vocab_embedding = 2 * emb_dim * vocab_size\n",
    "total_params = (\n",
    "    (attention_qkv + attention_projection) + dense_mlp + moe_routed + num_experts * moe_routed + vocab_embedding\n",
    ")\n",
    "active_params = (\n",
    "    (attention_qkv + attention_projection)\n",
    "    + dense_mlp\n",
    "    + (moe_gate_shared + num_experts_per_tok * moe_routed)\n",
    "    + vocab_embedding\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import io\n",
    "import pandas as pd\n",
    "\n",
    "params = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "Attention (QKV),{attention_qkv:.2e},MLA\n",
    "Attention (Projection),{attention_projection:.2e},\n",
    "Dense MLP,{dense_mlp:.2e},dense layers\n",
    "MoE gate & shared,{moe_gate_shared:.2e},gate + shared experts\n",
    "MoE routed,{moe_routed:.2e},routed experts\n",
    "Vocab embedding,{vocab_embedding:.2e}, input & output embedding\n",
    "Total Params,{total_params:.2e},Attention + gates & shared + experts * routed + vocab embedding\n",
    "Active Params,{active_params:.2e},Attention + gates & shared + experts per token * routed + vocab embedding\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "params.fillna(\"\", inplace=True)"
   ],
   "metadata": {
    "id": "9NULVWhkSvdg",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041549987,
     "user_tz": 180,
     "elapsed": 356,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "9NULVWhkSvdg",
   "execution_count": 3,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "params"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 300
    },
    "id": "1sYIlF2NTIfB",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550004,
     "user_tz": 180,
     "elapsed": 15,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "69fea189-0677-454a-a2f2-03bab6fbc737"
   },
   "id": "1sYIlF2NTIfB",
   "execution_count": 4,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                     Name         Value  \\\n",
       "0         Attention (QKV)  4.250000e+09   \n",
       "1  Attention (Projection)  7.160000e+09   \n",
       "2               Dense MLP  1.190000e+09   \n",
       "3       MoE gate & shared  2.660000e+09   \n",
       "4              MoE routed  2.550000e+09   \n",
       "5         Vocab embedding  1.850000e+09   \n",
       "6            Total Params  6.710000e+11   \n",
       "7           Active Params  3.760000e+10   \n",
       "\n",
       "                                               Notes  \n",
       "0                                                MLA  \n",
       "1                                                     \n",
       "2                                       dense layers  \n",
       "3                              gate + shared experts  \n",
       "4                                     routed experts  \n",
       "5                           input & output embedding  \n",
       "6  Attention + gates & shared + experts * routed ...  \n",
       "7  Attention + gates & shared + experts per token...  "
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-20cd5430-0868-4699-ab49-c680c8517749\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Attention (QKV)</td>\n",
       "      <td>4.250000e+09</td>\n",
       "      <td>MLA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Attention (Projection)</td>\n",
       "      <td>7.160000e+09</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Dense MLP</td>\n",
       "      <td>1.190000e+09</td>\n",
       "      <td>dense layers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>MoE gate &amp; shared</td>\n",
       "      <td>2.660000e+09</td>\n",
       "      <td>gate + shared experts</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>MoE routed</td>\n",
       "      <td>2.550000e+09</td>\n",
       "      <td>routed experts</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Vocab embedding</td>\n",
       "      <td>1.850000e+09</td>\n",
       "      <td>input &amp; output embedding</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Total Params</td>\n",
       "      <td>6.710000e+11</td>\n",
       "      <td>Attention + gates &amp; shared + experts * routed ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Active Params</td>\n",
       "      <td>3.760000e+10</td>\n",
       "      <td>Attention + gates &amp; shared + experts per token...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-20cd5430-0868-4699-ab49-c680c8517749')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-20cd5430-0868-4699-ab49-c680c8517749 button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-20cd5430-0868-4699-ab49-c680c8517749');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-a884729a-393b-4880-8946-d3b4e60c9a47\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a884729a-393b-4880-8946-d3b4e60c9a47')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-a884729a-393b-4880-8946-d3b4e60c9a47 button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_e9586871-9a2a-48be-bd77-08cfe497b1e4\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('params')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_e9586871-9a2a-48be-bd77-08cfe497b1e4 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('params');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "params",
       "summary": "{\n  \"name\": \"params\",\n  \"rows\": 8,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8,\n        \"samples\": [\n          \"Attention (Projection)\",\n          \"Vocab embedding\",\n          \"Attention (QKV)\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 234656946063.21432,\n        \"min\": 1190000000.0,\n        \"max\": 671000000000.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          7160000000.0,\n          1850000000.0,\n          4250000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8,\n        \"samples\": [\n          \"\",\n          \" input & output embedding\",\n          \"MLA\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 4
    }
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4add3c91-86b7-4a57-b41e-23b16271d733",
   "metadata": {
    "id": "4add3c91-86b7-4a57-b41e-23b16271d733"
   },
   "source": [
    "FLOPs/device:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "605e6ac5-49d1-4c0c-b310-f5e9ac4a787f",
   "metadata": {
    "id": "605e6ac5-49d1-4c0c-b310-f5e9ac4a787f",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550030,
     "user_tz": 180,
     "elapsed": 6,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "outputs": [],
   "source": [
    "one_mla_qkv = 2 * batch_per_device * max_target_length * (\n",
    "    emb_dim * q_lora_rank + q_lora_rank * num_query_heads * (qk_nope_head_dim + qk_rope_head_dim)\n",
    ") + 2 * batch_per_device * max_target_length * (\n",
    "    emb_dim * (kv_lora_rank + qk_rope_head_dim) + kv_lora_rank * num_query_heads * (qk_nope_head_dim + v_head_dim)\n",
    ")\n",
    "one_mla_attention = (\n",
    "    2\n",
    "    * batch_per_device\n",
    "    * max_target_length\n",
    "    * max_target_length\n",
    "    * num_query_heads\n",
    "    * (qk_nope_head_dim + qk_rope_head_dim + v_head_dim)\n",
    ")\n",
    "one_mla_projection = 2 * batch_per_device * max_target_length * emb_dim * num_query_heads * v_head_dim\n",
    "total_attention = (num_dense_layers + num_moe_layers) * (one_mla_qkv + one_mla_attention + one_mla_projection)\n",
    "one_dense_layer = (\n",
    "    2 * batch_per_device * max_target_length * mlp_dim * emb_dim * num_activations\n",
    "    + 2 * batch_per_device * max_target_length * mlp_dim * emb_dim\n",
    ")\n",
    "one_moe_layer = 2 * batch_per_device * max_target_length * emb_dim * num_experts + (\n",
    "    shared_experts + num_experts_per_tok\n",
    ") * (\n",
    "    2 * batch_per_device * max_target_length * moe_mlp_dim * emb_dim * num_activations\n",
    "    + 2 * batch_per_device * max_target_length * moe_mlp_dim * emb_dim\n",
    ")\n",
    "total_mlp = num_dense_layers * one_dense_layer + num_moe_layers * one_moe_layer\n",
    "vocab_embedding_flops = 2 * batch_per_device * max_target_length * emb_dim * vocab_size\n",
    "total_forward = vocab_embedding_flops + total_attention + total_mlp\n",
    "total_backward = 2 * total_forward\n",
    "total = total_forward + total_backward\n",
    "total_tflops = total / (10**12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "05132d2b-330b-4526-9ccf-a6d2b1c5c784",
   "metadata": {
    "id": "05132d2b-330b-4526-9ccf-a6d2b1c5c784",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550030,
     "user_tz": 180,
     "elapsed": 2,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "outputs": [],
   "source": [
    "flops = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "One MLA QKV,{one_mla_qkv:.2e},\n",
    "One MLA Attention,{one_mla_attention:.2e},\n",
    "One MLA Projection,{one_mla_projection:.2e},\n",
    "Total Attention,{total_attention:.2e},\n",
    "One Dense Layer,{one_dense_layer:.2e},\n",
    "One MoE Layer,{one_moe_layer:.2e},gate + shared + routed\n",
    "Total MLP,{total_mlp:.2e},dense_layers + moe_layers\n",
    "Vocab embedding,{vocab_embedding:.2e},\n",
    "Total forward,{total_forward:.2e},embedding + attention + feedforward\n",
    "Total backward,{total_backward:.2e},2x forward flops\n",
    "Total,{total:.2e},\n",
    "Total (TFLOPs),{total_tflops:.2e},\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "flops.fillna(\"\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "flops"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 426
    },
    "id": "B88uTqc7U1CE",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550047,
     "user_tz": 180,
     "elapsed": 18,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "6387d69b-4bce-4a51-f1d6-ee495d7f6447"
   },
   "id": "B88uTqc7U1CE",
   "execution_count": 7,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                  Name         Value                                Notes\n",
       "0          One MLA QKV  9.130000e+12                                     \n",
       "1    One MLA Attention  4.400000e+13                                     \n",
       "2   One MLA Projection  1.540000e+13                                     \n",
       "3      Total Attention  4.180000e+15                                     \n",
       "4      One Dense Layer  5.200000e+13                                     \n",
       "5        One MoE Layer  5.220000e+13               gate + shared + routed\n",
       "6            Total MLP  3.180000e+15            dense_layers + moe_layers\n",
       "7      Vocab embedding  1.850000e+09                                     \n",
       "8        Total forward  7.480000e+15  embedding + attention + feedforward\n",
       "9       Total backward  1.500000e+16                     2x forward flops\n",
       "10               Total  2.240000e+16                                     \n",
       "11      Total (TFLOPs)  2.240000e+04                                     "
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-dd39e97a-bf20-49d7-b44c-990a677f9f2e\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>One MLA QKV</td>\n",
       "      <td>9.130000e+12</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>One MLA Attention</td>\n",
       "      <td>4.400000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>One MLA Projection</td>\n",
       "      <td>1.540000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Total Attention</td>\n",
       "      <td>4.180000e+15</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>One Dense Layer</td>\n",
       "      <td>5.200000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>One MoE Layer</td>\n",
       "      <td>5.220000e+13</td>\n",
       "      <td>gate + shared + routed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Total MLP</td>\n",
       "      <td>3.180000e+15</td>\n",
       "      <td>dense_layers + moe_layers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Vocab embedding</td>\n",
       "      <td>1.850000e+09</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Total forward</td>\n",
       "      <td>7.480000e+15</td>\n",
       "      <td>embedding + attention + feedforward</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Total backward</td>\n",
       "      <td>1.500000e+16</td>\n",
       "      <td>2x forward flops</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Total</td>\n",
       "      <td>2.240000e+16</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Total (TFLOPs)</td>\n",
       "      <td>2.240000e+04</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-dd39e97a-bf20-49d7-b44c-990a677f9f2e')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-dd39e97a-bf20-49d7-b44c-990a677f9f2e button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-dd39e97a-bf20-49d7-b44c-990a677f9f2e');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-932a66e2-3a8f-4221-b60c-aca468c236cf\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-932a66e2-3a8f-4221-b60c-aca468c236cf')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-932a66e2-3a8f-4221-b60c-aca468c236cf button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_8586fba5-4f68-4e9a-8519-65358c78b320\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('flops')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_8586fba5-4f68-4e9a-8519-65358c78b320 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('flops');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "flops",
       "summary": "{\n  \"name\": \"flops\",\n  \"rows\": 12,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 12,\n        \"samples\": [\n          \"Total\",\n          \"Total backward\",\n          \"One MLA QKV\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 7269977785542156.0,\n        \"min\": 22400.0,\n        \"max\": 2.24e+16,\n        \"num_unique_values\": 12,\n        \"samples\": [\n          2.24e+16,\n          1.5e+16,\n          9130000000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"gate + shared + routed\",\n          \"2x forward flops\",\n          \"dense_layers + moe_layers\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 7
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Mixtral-like models"
   ],
   "metadata": {
    "id": "-L-QRkgDDy82"
   },
   "id": "-L-QRkgDDy82"
  },
  {
   "cell_type": "markdown",
   "source": [
    "Enter your component values here:"
   ],
   "metadata": {
    "id": "PozJ84x5D6PI"
   },
   "id": "PozJ84x5D6PI"
  },
  {
   "cell_type": "code",
   "source": [
    "batch_per_device = 1\n",
    "num_dense_layers = 5\n",
    "max_target_length = 4096\n",
    "vocab_size = 32000\n",
    "emb_dim = 4096\n",
    "mlp_dim = 14336\n",
    "head_dim = 128\n",
    "num_query_heads = 32\n",
    "num_kv_heads = 8\n",
    "num_experts = 8\n",
    "num_experts_per_tok = 2"
   ],
   "metadata": {
    "id": "WXOKOvezD0y3",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550066,
     "user_tz": 180,
     "elapsed": 18,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "WXOKOvezD0y3",
   "execution_count": 8,
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "source": [
    "Params:"
   ],
   "metadata": {
    "id": "YvdbsrBuES5X"
   },
   "id": "YvdbsrBuES5X"
  },
  {
   "cell_type": "code",
   "source": [
    "attention_qkvo = num_dense_layers * (2 * emb_dim * num_query_heads * head_dim + 2 * emb_dim * num_kv_heads * head_dim)\n",
    "mlp = num_dense_layers * (emb_dim * num_experts + 3 * emb_dim * mlp_dim)\n",
    "vocab_embedding = 2 * emb_dim * vocab_size\n",
    "one_expert = attention_qkvo + mlp + vocab_embedding\n",
    "total_params = attention_qkvo + vocab_embedding + num_experts * mlp\n",
    "active_params = attention_qkvo + vocab_embedding + num_experts_per_tok * mlp"
   ],
   "metadata": {
    "id": "xGiXwKYBE13L",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550155,
     "user_tz": 180,
     "elapsed": 86,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "xGiXwKYBE13L",
   "execution_count": 9,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "import io\n",
    "import pandas as pd\n",
    "\n",
    "params = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "Attention (QKVO * layers),{attention_qkvo:.2e},\"QO - 2DNH, KV - 2DKH\"\n",
    "MLP (matmuls * layers),{mlp:.2e},gates + 3 DF\n",
    "Vocab embedding,{vocab_embedding:.2e},2 DV ( input & output embedding)\n",
    "One expert,{one_expert:.2e},Attention + MLP + Vocab embedding\n",
    "Total Params,{total_params:.2e},Attention + experts * MLP + Vocab embedding\n",
    "Active Params,{active_params:.2e},Attention + experts per token * MLP + Vocab embedding\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "params.fillna(\"\", inplace=True)"
   ],
   "metadata": {
    "id": "4Wpi4QCDD2v9",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550156,
     "user_tz": 180,
     "elapsed": 3,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "4Wpi4QCDD2v9",
   "execution_count": 10,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "params"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 238
    },
    "id": "DqIV-9bvOH4-",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550158,
     "user_tz": 180,
     "elapsed": 3,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "08c371d9-fae4-4c34-f43f-ab9aef41c5a1"
   },
   "id": "DqIV-9bvOH4-",
   "execution_count": 11,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                        Name         Value  \\\n",
       "0  Attention (QKVO * layers)  2.100000e+08   \n",
       "1     MLP (matmuls * layers)  8.810000e+08   \n",
       "2            Vocab embedding  2.620000e+08   \n",
       "3                 One expert  1.350000e+09   \n",
       "4               Total Params  7.520000e+09   \n",
       "5              Active Params  2.230000e+09   \n",
       "\n",
       "                                               Notes  \n",
       "0                               QO - 2DNH, KV - 2DKH  \n",
       "1                                       gates + 3 DF  \n",
       "2                   2 DV ( input & output embedding)  \n",
       "3                  Attention + MLP + Vocab embedding  \n",
       "4        Attention + experts * MLP + Vocab embedding  \n",
       "5  Attention + experts per token * MLP + Vocab em...  "
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-e25e5401-34be-468c-a098-6b394cd8c977\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Attention (QKVO * layers)</td>\n",
       "      <td>2.100000e+08</td>\n",
       "      <td>QO - 2DNH, KV - 2DKH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>MLP (matmuls * layers)</td>\n",
       "      <td>8.810000e+08</td>\n",
       "      <td>gates + 3 DF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Vocab embedding</td>\n",
       "      <td>2.620000e+08</td>\n",
       "      <td>2 DV ( input &amp; output embedding)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>One expert</td>\n",
       "      <td>1.350000e+09</td>\n",
       "      <td>Attention + MLP + Vocab embedding</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Total Params</td>\n",
       "      <td>7.520000e+09</td>\n",
       "      <td>Attention + experts * MLP + Vocab embedding</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Active Params</td>\n",
       "      <td>2.230000e+09</td>\n",
       "      <td>Attention + experts per token * MLP + Vocab em...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e25e5401-34be-468c-a098-6b394cd8c977')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-e25e5401-34be-468c-a098-6b394cd8c977 button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-e25e5401-34be-468c-a098-6b394cd8c977');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-73617401-db58-42b2-ac21-9b98f1a5b0b4\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-73617401-db58-42b2-ac21-9b98f1a5b0b4')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-73617401-db58-42b2-ac21-9b98f1a5b0b4 button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_259ae43e-c242-4f17-9b93-68353c710f67\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('params')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_259ae43e-c242-4f17-9b93-68353c710f67 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('params');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "params",
       "summary": "{\n  \"name\": \"params\",\n  \"rows\": 6,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"Attention (QKVO * layers)\",\n          \"MLP (matmuls * layers)\",\n          \"Active Params\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 2770866416.845099,\n        \"min\": 210000000.0,\n        \"max\": 7520000000.0,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          210000000.0,\n          881000000.0,\n          2230000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"QO - 2DNH, KV - 2DKH\",\n          \"gates + 3 DF\",\n          \"Attention + experts per token * MLP + Vocab embedding\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 11
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "FLOPs:"
   ],
   "metadata": {
    "id": "QiORk0bALLgF"
   },
   "id": "QiORk0bALLgF"
  },
  {
   "cell_type": "code",
   "source": [
    "forward_attention_weights = num_dense_layers * (\n",
    "    2 * 2 * batch_per_device * max_target_length * emb_dim * num_query_heads * head_dim\n",
    "    + 2 * 2 * batch_per_device * max_target_length * emb_dim * num_kv_heads * head_dim\n",
    ")\n",
    "forward_attention_o_weights = (\n",
    "    num_dense_layers * 2 * 2 * batch_per_device * max_target_length * max_target_length * num_query_heads * head_dim\n",
    ")\n",
    "forward_mlp = num_dense_layers * (\n",
    "    2 * batch_per_device * max_target_length * emb_dim * num_experts\n",
    "    + 3 * 2 * batch_per_device * max_target_length * emb_dim * mlp_dim * num_experts_per_tok\n",
    ")\n",
    "forward_vocab_embedding = 2 * batch_per_device * max_target_length * emb_dim * vocab_size\n",
    "total_forward = forward_attention_weights + forward_attention_o_weights + forward_mlp + forward_vocab_embedding\n",
    "total_backward = 2 * total_forward\n",
    "total = total_forward + total_backward"
   ],
   "metadata": {
    "id": "VVP2ko6ZFV0K",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550159,
     "user_tz": 180,
     "elapsed": 1,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "VVP2ko6ZFV0K",
   "execution_count": 12,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "flops = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "Forward Attention / weights,{forward_attention_weights:.2e},QKVO matmul with weights (4BSDNH + 4BSDKH)\n",
    "Forward Attention /o weights,{forward_attention_o_weights:.2e},2 Dot product in attention (softmax ignored)\n",
    "Forward MLP,{forward_mlp:.2e},gate + 3 * 2BSDF\n",
    "Forward Vocab embedding,{forward_vocab_embedding:.2e},2BSDV\n",
    "Total forward,{total_forward:.2e},\n",
    "Total backward,{total_backward:.2e},\n",
    "Total,{total:.2e},\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "flops.fillna(\"\", inplace=True)"
   ],
   "metadata": {
    "id": "mgIcfQBUEYI_",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550166,
     "user_tz": 180,
     "elapsed": 7,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "mgIcfQBUEYI_",
   "execution_count": 13,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "flops"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 269
    },
    "id": "v82nZipKR6tF",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550232,
     "user_tz": 180,
     "elapsed": 65,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "e4e211ce-6774-4c86-f730-1e46190a2281"
   },
   "id": "v82nZipKR6tF",
   "execution_count": 14,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                           Name         Value  \\\n",
       "0   Forward Attention / weights  1.720000e+12   \n",
       "1  Forward Attention /o weights  1.370000e+12   \n",
       "2                   Forward MLP  1.440000e+13   \n",
       "3       Forward Vocab embedding  1.070000e+12   \n",
       "4                 Total forward  1.860000e+13   \n",
       "5                Total backward  3.720000e+13   \n",
       "6                         Total  5.580000e+13   \n",
       "\n",
       "                                          Notes  \n",
       "0    QKVO matmul with weights (4BSDNH + 4BSDKH)  \n",
       "1  2 Dot product in attention (softmax ignored)  \n",
       "2                              gate + 3 * 2BSDF  \n",
       "3                                         2BSDV  \n",
       "4                                                \n",
       "5                                                \n",
       "6                                                "
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-15a30764-f394-4f59-99b2-206e8a672a54\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Forward Attention / weights</td>\n",
       "      <td>1.720000e+12</td>\n",
       "      <td>QKVO matmul with weights (4BSDNH + 4BSDKH)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Forward Attention /o weights</td>\n",
       "      <td>1.370000e+12</td>\n",
       "      <td>2 Dot product in attention (softmax ignored)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Forward MLP</td>\n",
       "      <td>1.440000e+13</td>\n",
       "      <td>gate + 3 * 2BSDF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Forward Vocab embedding</td>\n",
       "      <td>1.070000e+12</td>\n",
       "      <td>2BSDV</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Total forward</td>\n",
       "      <td>1.860000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Total backward</td>\n",
       "      <td>3.720000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Total</td>\n",
       "      <td>5.580000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-15a30764-f394-4f59-99b2-206e8a672a54')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-15a30764-f394-4f59-99b2-206e8a672a54 button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-15a30764-f394-4f59-99b2-206e8a672a54');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-010013e8-3f35-48ae-8dbf-d5b3e2d509e5\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-010013e8-3f35-48ae-8dbf-d5b3e2d509e5')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-010013e8-3f35-48ae-8dbf-d5b3e2d509e5 button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_c5feadf0-a7c5-4351-b79e-863cea19c673\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('flops')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_c5feadf0-a7c5-4351-b79e-863cea19c673 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('flops');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "flops",
       "summary": "{\n  \"name\": \"flops\",\n  \"rows\": 7,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"Forward Attention / weights\",\n          \"Forward Attention /o weights\",\n          \"Total backward\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 20962458393632.22,\n        \"min\": 1070000000000.0,\n        \"max\": 55800000000000.0,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          1720000000000.0,\n          1370000000000.0,\n          37200000000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"2 Dot product in attention (softmax ignored)\",\n          \"\",\n          \"gate + 3 * 2BSDF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 14
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Dense models"
   ],
   "metadata": {
    "id": "kOhEU-XtD1gc"
   },
   "id": "kOhEU-XtD1gc"
  },
  {
   "cell_type": "markdown",
   "source": [
    "Enter your component values here:"
   ],
   "metadata": {
    "id": "vmM42gEfLuPP"
   },
   "id": "vmM42gEfLuPP"
  },
  {
   "cell_type": "code",
   "source": [
    "batch_per_device = 1\n",
    "num_dense_layers = 32\n",
    "max_target_length = 4096\n",
    "vocab_size = 32000\n",
    "emb_dim = 4096\n",
    "mlp_dim = 14336\n",
    "head_dim = 128\n",
    "num_query_heads = 32\n",
    "num_kv_heads = 8"
   ],
   "metadata": {
    "id": "FySa1W6nKxki",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550235,
     "user_tz": 180,
     "elapsed": 1,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "FySa1W6nKxki",
   "execution_count": 15,
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "source": [
    "Params:"
   ],
   "metadata": {
    "id": "uqcJk-DRLxQD"
   },
   "id": "uqcJk-DRLxQD"
  },
  {
   "cell_type": "code",
   "source": [
    "attention_qkvo = num_dense_layers * (2 * emb_dim * num_query_heads * head_dim + 2 * emb_dim * num_kv_heads * head_dim)\n",
    "mlp = num_dense_layers * (3 * emb_dim * mlp_dim)\n",
    "vocab_embedding = 2 * emb_dim * vocab_size\n",
    "total_params = attention_qkvo + vocab_embedding + mlp"
   ],
   "metadata": {
    "id": "-4L1iNokK95V",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550236,
     "user_tz": 180,
     "elapsed": 1,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "-4L1iNokK95V",
   "execution_count": 16,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "import io\n",
    "import pandas as pd\n",
    "\n",
    "params = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "Attention (QKVO * layers),{attention_qkvo:.2e},\"QO - 2DNH, KV - 2DKH\"\n",
    "MLP (matmuls * layers),{mlp:.2e},3 DF\n",
    "Vocab embedding,{vocab_embedding:.2e},2 DV ( input & output embedding)\n",
    "Total Params,{total_params:.2e},Attention + MLP + Vocab embedding\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "params.fillna(\"\", inplace=True)"
   ],
   "metadata": {
    "id": "wTkkrjjpVCen",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550294,
     "user_tz": 180,
     "elapsed": 58,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "wTkkrjjpVCen",
   "execution_count": 17,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "params"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 175
    },
    "id": "LMGQ2LU7VRCQ",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550309,
     "user_tz": 180,
     "elapsed": 14,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "3cc3084d-af84-46b4-baf1-fcf3d82f1763"
   },
   "id": "LMGQ2LU7VRCQ",
   "execution_count": 18,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                        Name         Value                              Notes\n",
       "0  Attention (QKVO * layers)  1.340000e+09               QO - 2DNH, KV - 2DKH\n",
       "1     MLP (matmuls * layers)  5.640000e+09                               3 DF\n",
       "2            Vocab embedding  2.620000e+08   2 DV ( input & output embedding)\n",
       "3               Total Params  7.240000e+09  Attention + MLP + Vocab embedding"
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-79f5450e-9b4d-4d6e-aaea-8b08c01c9e05\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Attention (QKVO * layers)</td>\n",
       "      <td>1.340000e+09</td>\n",
       "      <td>QO - 2DNH, KV - 2DKH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>MLP (matmuls * layers)</td>\n",
       "      <td>5.640000e+09</td>\n",
       "      <td>3 DF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Vocab embedding</td>\n",
       "      <td>2.620000e+08</td>\n",
       "      <td>2 DV ( input &amp; output embedding)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Total Params</td>\n",
       "      <td>7.240000e+09</td>\n",
       "      <td>Attention + MLP + Vocab embedding</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-79f5450e-9b4d-4d6e-aaea-8b08c01c9e05')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-79f5450e-9b4d-4d6e-aaea-8b08c01c9e05 button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-79f5450e-9b4d-4d6e-aaea-8b08c01c9e05');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-d82feaf1-2f9a-4e92-8d72-f07e115f931a\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d82feaf1-2f9a-4e92-8d72-f07e115f931a')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-d82feaf1-2f9a-4e92-8d72-f07e115f931a button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_b5fb92c4-49f3-4a09-9cfb-e192de4a8cab\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('params')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_b5fb92c4-49f3-4a09-9cfb-e192de4a8cab button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('params');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "params",
       "summary": "{\n  \"name\": \"params\",\n  \"rows\": 4,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"MLP (matmuls * layers)\",\n          \"Total Params\",\n          \"Attention (QKVO * layers)\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 3349595149.6661005,\n        \"min\": 262000000.0,\n        \"max\": 7240000000.0,\n        \"num_unique_values\": 4,\n        \"samples\": [\n          5640000000.0,\n          7240000000.0,\n          1340000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"3 DF\",\n          \"Attention + MLP + Vocab embedding\",\n          \"QO - 2DNH, KV - 2DKH\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 18
    }
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "FLOPs:"
   ],
   "metadata": {
    "id": "DvOBmhcPLmMM"
   },
   "id": "DvOBmhcPLmMM"
  },
  {
   "cell_type": "code",
   "source": [
    "forward_attention_weights = num_dense_layers * (\n",
    "    2 * 2 * batch_per_device * max_target_length * emb_dim * num_query_heads * head_dim\n",
    "    + 2 * 2 * batch_per_device * max_target_length * emb_dim * num_kv_heads * head_dim\n",
    ")\n",
    "forward_attention_o_weights = (\n",
    "    num_dense_layers * 2 * 2 * batch_per_device * max_target_length * max_target_length * num_query_heads * head_dim\n",
    ")\n",
    "forward_mlp = num_dense_layers * (3 * 2 * batch_per_device * max_target_length * emb_dim * mlp_dim)\n",
    "forward_vocab_embedding = 2 * batch_per_device * max_target_length * emb_dim * vocab_size\n",
    "total_forward = forward_attention_weights + forward_attention_o_weights + forward_mlp + forward_vocab_embedding\n",
    "total_backward = 2 * total_forward\n",
    "total = total_forward + total_backward"
   ],
   "metadata": {
    "id": "hvHTuhnuLnkh",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550328,
     "user_tz": 180,
     "elapsed": 16,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "hvHTuhnuLnkh",
   "execution_count": 19,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "flops = pd.read_csv(\n",
    "    io.StringIO(\n",
    "        f\"\"\"\n",
    "Name, Value, Notes\n",
    "Forward Attention / weights,{forward_attention_weights:.2e},QKVO matmul with weights (4BSDNH + 4BSDKH)\n",
    "Forward Attention /o weights,{forward_attention_o_weights:.2e},2 Dot product in attention (softmax ignored)\n",
    "Forward MLP,{forward_mlp:.2e},3 * 2BSDF\n",
    "Forward Vocab embedding,{forward_vocab_embedding:.2e},2BSDV\n",
    "Total forward,{total_forward:.2e},\n",
    "Total backward,{total_backward:.2e},\n",
    "Total,{total:.2e},\n",
    "\"\"\"\n",
    "    )\n",
    ")\n",
    "flops.fillna(\"\", inplace=True)"
   ],
   "metadata": {
    "id": "z-UzWpkuHkGW",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550350,
     "user_tz": 180,
     "elapsed": 13,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "z-UzWpkuHkGW",
   "execution_count": 20,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "flops"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 269
    },
    "id": "Ss8_1Y1hK8Vl",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550404,
     "user_tz": 180,
     "elapsed": 54,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    },
    "outputId": "f6404a5d-69d6-4724-fb42-ba9dc8dbaf64"
   },
   "id": "Ss8_1Y1hK8Vl",
   "execution_count": 21,
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                           Name         Value  \\\n",
       "0   Forward Attention / weights  1.100000e+13   \n",
       "1  Forward Attention /o weights  8.800000e+12   \n",
       "2                   Forward MLP  4.620000e+13   \n",
       "3       Forward Vocab embedding  1.070000e+12   \n",
       "4                 Total forward  6.700000e+13   \n",
       "5                Total backward  1.340000e+14   \n",
       "6                         Total  2.010000e+14   \n",
       "\n",
       "                                          Notes  \n",
       "0    QKVO matmul with weights (4BSDNH + 4BSDKH)  \n",
       "1  2 Dot product in attention (softmax ignored)  \n",
       "2                                     3 * 2BSDF  \n",
       "3                                         2BSDV  \n",
       "4                                                \n",
       "5                                                \n",
       "6                                                "
      ],
      "text/html": [
       "\n",
       "  <div id=\"df-763ad6a4-18f8-4cc0-9231-4857552aad2d\" class=\"colab-df-container\">\n",
       "    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Value</th>\n",
       "      <th>Notes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Forward Attention / weights</td>\n",
       "      <td>1.100000e+13</td>\n",
       "      <td>QKVO matmul with weights (4BSDNH + 4BSDKH)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Forward Attention /o weights</td>\n",
       "      <td>8.800000e+12</td>\n",
       "      <td>2 Dot product in attention (softmax ignored)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Forward MLP</td>\n",
       "      <td>4.620000e+13</td>\n",
       "      <td>3 * 2BSDF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Forward Vocab embedding</td>\n",
       "      <td>1.070000e+12</td>\n",
       "      <td>2BSDV</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Total forward</td>\n",
       "      <td>6.700000e+13</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Total backward</td>\n",
       "      <td>1.340000e+14</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Total</td>\n",
       "      <td>2.010000e+14</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "    <div class=\"colab-df-buttons\">\n",
       "\n",
       "  <div class=\"colab-df-container\">\n",
       "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-763ad6a4-18f8-4cc0-9231-4857552aad2d')\"\n",
       "            title=\"Convert this dataframe to an interactive table.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
       "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "\n",
       "  <style>\n",
       "    .colab-df-container {\n",
       "      display:flex;\n",
       "      gap: 12px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert {\n",
       "      background-color: #E8F0FE;\n",
       "      border: none;\n",
       "      border-radius: 50%;\n",
       "      cursor: pointer;\n",
       "      display: none;\n",
       "      fill: #1967D2;\n",
       "      height: 32px;\n",
       "      padding: 0 0 0 0;\n",
       "      width: 32px;\n",
       "    }\n",
       "\n",
       "    .colab-df-convert:hover {\n",
       "      background-color: #E2EBFA;\n",
       "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "      fill: #174EA6;\n",
       "    }\n",
       "\n",
       "    .colab-df-buttons div {\n",
       "      margin-bottom: 4px;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert {\n",
       "      background-color: #3B4455;\n",
       "      fill: #D2E3FC;\n",
       "    }\n",
       "\n",
       "    [theme=dark] .colab-df-convert:hover {\n",
       "      background-color: #434B5C;\n",
       "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "      fill: #FFFFFF;\n",
       "    }\n",
       "  </style>\n",
       "\n",
       "    <script>\n",
       "      const buttonEl =\n",
       "        document.querySelector('#df-763ad6a4-18f8-4cc0-9231-4857552aad2d button.colab-df-convert');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      async function convertToInteractive(key) {\n",
       "        const element = document.querySelector('#df-763ad6a4-18f8-4cc0-9231-4857552aad2d');\n",
       "        const dataTable =\n",
       "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
       "                                                    [key], {});\n",
       "        if (!dataTable) return;\n",
       "\n",
       "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
       "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
       "          + ' to learn more about interactive tables.';\n",
       "        element.innerHTML = '';\n",
       "        dataTable['output_type'] = 'display_data';\n",
       "        await google.colab.output.renderOutput(dataTable, element);\n",
       "        const docLink = document.createElement('div');\n",
       "        docLink.innerHTML = docLinkHtml;\n",
       "        element.appendChild(docLink);\n",
       "      }\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "\n",
       "    <div id=\"df-c437f488-0ee6-48c7-ad86-0737141b9e66\">\n",
       "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-c437f488-0ee6-48c7-ad86-0737141b9e66')\"\n",
       "                title=\"Suggest charts\"\n",
       "                style=\"display:none;\">\n",
       "\n",
       "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "     width=\"24px\">\n",
       "    <g>\n",
       "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
       "    </g>\n",
       "</svg>\n",
       "      </button>\n",
       "\n",
       "<style>\n",
       "  .colab-df-quickchart {\n",
       "      --bg-color: #E8F0FE;\n",
       "      --fill-color: #1967D2;\n",
       "      --hover-bg-color: #E2EBFA;\n",
       "      --hover-fill-color: #174EA6;\n",
       "      --disabled-fill-color: #AAA;\n",
       "      --disabled-bg-color: #DDD;\n",
       "  }\n",
       "\n",
       "  [theme=dark] .colab-df-quickchart {\n",
       "      --bg-color: #3B4455;\n",
       "      --fill-color: #D2E3FC;\n",
       "      --hover-bg-color: #434B5C;\n",
       "      --hover-fill-color: #FFFFFF;\n",
       "      --disabled-bg-color: #3B4455;\n",
       "      --disabled-fill-color: #666;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart {\n",
       "    background-color: var(--bg-color);\n",
       "    border: none;\n",
       "    border-radius: 50%;\n",
       "    cursor: pointer;\n",
       "    display: none;\n",
       "    fill: var(--fill-color);\n",
       "    height: 32px;\n",
       "    padding: 0;\n",
       "    width: 32px;\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart:hover {\n",
       "    background-color: var(--hover-bg-color);\n",
       "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "    fill: var(--button-hover-fill-color);\n",
       "  }\n",
       "\n",
       "  .colab-df-quickchart-complete:disabled,\n",
       "  .colab-df-quickchart-complete:disabled:hover {\n",
       "    background-color: var(--disabled-bg-color);\n",
       "    fill: var(--disabled-fill-color);\n",
       "    box-shadow: none;\n",
       "  }\n",
       "\n",
       "  .colab-df-spinner {\n",
       "    border: 2px solid var(--fill-color);\n",
       "    border-color: transparent;\n",
       "    border-bottom-color: var(--fill-color);\n",
       "    animation:\n",
       "      spin 1s steps(1) infinite;\n",
       "  }\n",
       "\n",
       "  @keyframes spin {\n",
       "    0% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "      border-left-color: var(--fill-color);\n",
       "    }\n",
       "    20% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    30% {\n",
       "      border-color: transparent;\n",
       "      border-left-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    40% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-top-color: var(--fill-color);\n",
       "    }\n",
       "    60% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "    }\n",
       "    80% {\n",
       "      border-color: transparent;\n",
       "      border-right-color: var(--fill-color);\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "    90% {\n",
       "      border-color: transparent;\n",
       "      border-bottom-color: var(--fill-color);\n",
       "    }\n",
       "  }\n",
       "</style>\n",
       "\n",
       "      <script>\n",
       "        async function quickchart(key) {\n",
       "          const quickchartButtonEl =\n",
       "            document.querySelector('#' + key + ' button');\n",
       "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
       "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
       "          try {\n",
       "            const charts = await google.colab.kernel.invokeFunction(\n",
       "                'suggestCharts', [key], {});\n",
       "          } catch (error) {\n",
       "            console.error('Error during call to suggestCharts:', error);\n",
       "          }\n",
       "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
       "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
       "        }\n",
       "        (() => {\n",
       "          let quickchartButtonEl =\n",
       "            document.querySelector('#df-c437f488-0ee6-48c7-ad86-0737141b9e66 button');\n",
       "          quickchartButtonEl.style.display =\n",
       "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "        })();\n",
       "      </script>\n",
       "    </div>\n",
       "\n",
       "  <div id=\"id_d87270d6-f992-4c78-ac8e-073f41732976\">\n",
       "    <style>\n",
       "      .colab-df-generate {\n",
       "        background-color: #E8F0FE;\n",
       "        border: none;\n",
       "        border-radius: 50%;\n",
       "        cursor: pointer;\n",
       "        display: none;\n",
       "        fill: #1967D2;\n",
       "        height: 32px;\n",
       "        padding: 0 0 0 0;\n",
       "        width: 32px;\n",
       "      }\n",
       "\n",
       "      .colab-df-generate:hover {\n",
       "        background-color: #E2EBFA;\n",
       "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
       "        fill: #174EA6;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate {\n",
       "        background-color: #3B4455;\n",
       "        fill: #D2E3FC;\n",
       "      }\n",
       "\n",
       "      [theme=dark] .colab-df-generate:hover {\n",
       "        background-color: #434B5C;\n",
       "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
       "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
       "        fill: #FFFFFF;\n",
       "      }\n",
       "    </style>\n",
       "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('flops')\"\n",
       "            title=\"Generate code using this dataframe.\"\n",
       "            style=\"display:none;\">\n",
       "\n",
       "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
       "       width=\"24px\">\n",
       "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
       "  </svg>\n",
       "    </button>\n",
       "    <script>\n",
       "      (() => {\n",
       "      const buttonEl =\n",
       "        document.querySelector('#id_d87270d6-f992-4c78-ac8e-073f41732976 button.colab-df-generate');\n",
       "      buttonEl.style.display =\n",
       "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
       "\n",
       "      buttonEl.onclick = () => {\n",
       "        google.colab.notebook.generateWithVariable('flops');\n",
       "      }\n",
       "      })();\n",
       "    </script>\n",
       "  </div>\n",
       "\n",
       "    </div>\n",
       "  </div>\n"
      ],
      "application/vnd.google.colaboratory.intrinsic+json": {
       "type": "dataframe",
       "variable_name": "flops",
       "summary": "{\n  \"name\": \"flops\",\n  \"rows\": 7,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"Forward Attention / weights\",\n          \"Forward Attention /o weights\",\n          \"Total backward\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Value\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 74997382398765.53,\n        \"min\": 1070000000000.0,\n        \"max\": 201000000000000.0,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          11000000000000.0,\n          8800000000000.0,\n          134000000000000.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \" Notes\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"2 Dot product in attention (softmax ignored)\",\n          \"\",\n          \"3 * 2BSDF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
      }
     },
     "metadata": {},
     "execution_count": 21
    }
   ]
  },
  {
   "cell_type": "code",
   "source": [],
   "metadata": {
    "id": "9gDTLDgUVx1P",
    "executionInfo": {
     "status": "ok",
     "timestamp": 1755041550404,
     "user_tz": 180,
     "elapsed": 1,
     "user": {
      "displayName": "Melissa Mendonca",
      "userId": "15270279630813034280"
     }
    }
   },
   "id": "9gDTLDgUVx1P",
   "execution_count": 21,
   "outputs": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  },
  "colab": {
   "provenance": []
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}